Random KFG Uniform Distribution:
— Pendant Pendant Sum Monte Carlo Simulation

1. Creating a Random Uniform Distribution

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code

import math
import random
from random import choices

import numpy as np
import pandas as pd
import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq
from pandas import Series, DataFrame

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu

Code

(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
0	AS010	KFG	27	8	10.392305	3	2	5	3.0	1.000000	22.0	9.695360	0.227273	4.333333	0.577350	-6.5	4.949747
1	AS011	KFG	15	92	183.904867	0	0	0	0.0	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000
2	AS012	KFG	85	2	5.196152	3	4	7	6.0	2.449490	18.0	7.348469	0.233333	10.333333	6.027714	-9.0	5.099020
3	AS013	KFG	90	4	14.456832	0	5	5	14.0	14.456832	48.0	44.508426	0.121951	0.000000	0.000000	-19.0	8.860023
4	AS014	KFG	42	53	40.137264	1	2	3	2.0	0.000000	99.0	7.000000	0.071429	17.000000	0.000000	-15.0	4.242641

Code

total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
print(f"{total_right_sums} right sums and {total_left_sums} left sums")
total_sums = total_right_sums + total_left_sums

left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0

(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_kfg_df.describe()

4354 right sums and 3734 left sums
Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)

	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
count	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000
mean	64.109231	309.835385	496.113591	6.698462	5.744615	12.443077	3.156923	2.032227	146.203077	70.992234	0.160933	9.914679	6.387789	-8.511022	5.777778
std	102.325678	1637.638528	2778.219238	13.630774	12.081363	25.319237	3.620689	3.925252	1052.163975	224.887696	0.179440	15.025712	12.766330	14.176137	11.853944
min	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-123.303371	0.000000
25%	14.000000	5.000000	7.071068	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-11.555556	0.000000
50%	32.000000	19.500000	26.267844	1.000000	1.000000	2.000000	2.000000	0.000000	25.000000	5.291503	0.105823	5.666667	0.000000	-3.333333	0.000000
75%	77.750000	78.750000	134.914450	6.000000	5.000000	11.750000	5.000000	2.828427	67.000000	41.225340	0.250000	13.925926	7.771955	0.000000	7.681053
max	1650.000000	26324.000000	42206.089632	105.000000	101.000000	200.000000	27.000000	37.094474	24072.000000	3210.316028	0.823009	150.525773	129.388714	0.000000	117.441998

1.2 Strawman Khipus based on a Random Uniform Distribution

Next, we make a dataframe based on randomly generated khipus that has:

The same number of khipus as the KFG
A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
Pendant values that are randomly generated from a uniform distribution.

Code

cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

# UNIFORM SAMPLE MATCH # 
# Note that although the maximum value of cord in KFG is 26324, a more representative limit is 1000.
# Use NON_ZERO cords in the khipu database as sample values
cord_values = []
for aKhipu in all_khipus:
    cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
sampler = DiscreteDistributionSampler(cord_values)

random_names = [f"runif_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_runif_khipu = []
for i in range(num_dummy_khipus):
    # Here we only sample cords from 1 to 2000, since there are few cords above 2000
    random_cords = [random.randint(1, 2000) for _ in range(sample_kfg_num_cords())]
                   #[round(x) for x in  sampler.uniform_sample(sample_kfg_num_cords())]
    strawmen_runif_khipu.append( StrawmanKhipu(random_names[i], "runif", random_cords) )
strawmen_runif_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_runif_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_runif_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_sums	mean_num_summands	mean_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness
0	runif_khipu_00000	runif	18	989.333333	412.331962	0	0	0.0	0.0	0.000000	0.0
1	runif_khipu_00001	runif	12	945.666667	561.967539	0	0	0.0	0.0	0.000000	0.0
2	runif_khipu_00002	runif	81	1032.975309	588.322764	1	1	2.0	1188.0	0.012346	46.0
3	runif_khipu_00003	runif	15	944.200000	558.606583	0	0	0.0	0.0	0.000000	0.0
4	runif_khipu_00004	runif	24	998.416667	632.610390	0	0	0.0	0.0	0.000000	0.0

Code

total_right_sums = sum(strawmen_runif_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_runif_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_runif_df.mean_left_handedness.mean(),1), round(strawmen_runif_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_runif_df.mean_left_handedness.std(),1), round(strawmen_runif_df.mean_right_handedness.std(),1))

print(f"Random Uniform - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"               - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_runif_df.describe()

total_right_sums=1940 total_left_sums=1858
Random Uniform - Right/Left Distribution = 51%/49% (1940/total_left_sums=1858)
               - Right/Left Mean Handedness = 16.6/-15.9 ±(39.8/38.9)

	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
count	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000
mean	68.235385	994.092289	569.039461	2.984615	2.858462	5.843077	0.946154	0.026115	609.397333	122.356884	0.021371	16.570869	8.879299	-15.938130	9.126179
std	131.938965	132.460267	83.906330	21.178886	20.374876	41.500664	1.126181	0.173590	726.788281	212.920709	0.037970	39.753211	29.803437	38.901977	29.663151
min	3.000000	424.666667	137.720732	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-418.686207	0.000000
25%	15.000000	935.419317	543.840313	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-15.375000	0.000000
50%	32.500000	998.589815	579.969655	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
75%	80.000000	1060.297297	604.767864	1.000000	1.000000	2.000000	2.000000	0.000000	1380.250000	223.711395	0.035609	19.375000	0.000000	0.000000	0.000000
max	1650.000000	1772.750000	899.973889	310.000000	303.000000	600.000000	5.000000	1.732051	1997.000000	1020.502001	0.363636	443.208754	352.748030	0.000000	350.818630

Note that the random uniform sums have a greater handedness overall, and a much greater standard deviation.

2. Random Uniform vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code

def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_runif_df = pd.concat([strawmen_kfg_df, strawmen_runif_df], axis=0)
combined_kfg_runif_df['source_color'] = [source_color(x) for x in combined_kfg_runif_df.source.values]

Code

kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right))
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right))
runif_left = sum(strawmen_runif_df.num_left_sums.tolist())
runif_right = sum(strawmen_runif_df.num_right_sums.tolist())
pct_runif_left = round(100.0*float(runif_left)/float(runif_left+runif_right)) if (runif_left+runif_right) > 0 else 0
pct_runif_right = round(100.0*float(runif_right)/float(runif_left+runif_right)) if (runif_left+runif_right) > 0 else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random Uniform: {runif_right}/{runif_left} ({pct_runif_right}%/{pct_runif_left}%)")

Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random Uniform: 1940/1858 (51%/49%)

Code

legend_text = "<b>Random Uniform vs KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_runif_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code

legend_text = "<b>Random Uniform vs KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_runif_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the random uniform set! A relatively clear separation occurs.

Code

legend_text = "<b>Random Uniform vs KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_runif_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="mean_num_summands", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Now we’re getting somewhere. Existing KFG Khipus have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of summands.

Code

legend_text = "<b>Random Uniform vs KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Pendant</i>"
fig = (px.scatter(combined_kfg_runif_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="num_pendants", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=["name", 'num_sums'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

Code

combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_right_sums'].values.tolist(), combined_kfg_runif_df['num_left_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG Handedness: (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_runif_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

Code

combined_kfg_runif_df['log_sum_mean'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_runif_df['mean_sum_value'].values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG -  Log(Sum Mean)</b>"
fig = (px.violin(combined_kfg_runif_df, y="log_sum_mean", 
                 points='all', color="source",
                 labels={"log_sum_mean": "log(Sum Mean)"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

Code

combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_left_sums'].values.tolist(), combined_kfg_runif_df['num_right_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG - #Sums per Pendant</b>"
fig = (px.violin(combined_kfg_runif_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums per Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

Code

combined_kfg_runif_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_runif_df['num_left_sums'].values.tolist(), combined_kfg_runif_df['num_right_sums'].values.tolist())]
combined_kfg_runif_df['source'] = ["KFG" if source == 'KFG' else "Random Uniform" for source in combined_kfg_runif_df.source.values.tolist()]
legend_text = "<b>Violin Plot - Random Uniform vs KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_runif_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - randomly generated khipus have large sums, few summands per sum, and few sums per pendant cord.